# devtools::install_local("geneRefineR/", force = T)
# library("echolocatoR") 
source("echolocatoR/R/functions.R") 

library(readxl)
library(DT)
library(data.table)
library(dplyr)
library(ggplot2)
library(plotly)
library(cowplot)
library(ggrepel)
library(curl)
library(biomaRt) 
# Ensembl LD API
library(httr)
library(jsonlite)
library(xml2)
library(gaston)
library(RCurl)
# *** susieR ****
# library(knitrBootstrap) #install_github('jimhester/knitrBootstrap')
library(susieR) # devtools::install_github("stephenslab/susieR")

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
## [1] C
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] susieR_0.6.2.0411  tidyr_0.8.1        RCurl_1.95-4.11   
##  [4] bitops_1.0-6       gaston_1.5.4       RcppParallel_4.4.2
##  [7] Rcpp_1.0.0         xml2_1.2.0         jsonlite_1.5      
## [10] httr_1.3.1         biomaRt_2.38.0     curl_3.2          
## [13] ggrepel_0.8.0      cowplot_0.9.3      plotly_4.7.1      
## [16] ggplot2_3.1.0      dplyr_0.7.6        data.table_1.11.8 
## [19] DT_0.4             readxl_1.1.0      
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-35      prettyunits_1.0.2    assertthat_0.2.0    
##  [4] rprojroot_1.3-2      digest_0.6.18        R6_2.4.0            
##  [7] cellranger_1.1.0     plyr_1.8.4           backports_1.1.2     
## [10] stats4_3.5.1         RSQLite_2.1.1        evaluate_0.11       
## [13] pillar_1.3.1         rlang_0.3.1          progress_1.2.0      
## [16] lazyeval_0.2.1       blob_1.1.1           S4Vectors_0.20.1    
## [19] Matrix_1.2-14        rmarkdown_1.10       stringr_1.4.0       
## [22] htmlwidgets_1.2      bit_1.1-14           munsell_0.5.0       
## [25] compiler_3.5.1       pkgconfig_2.0.2      BiocGenerics_0.28.0 
## [28] htmltools_0.3.6      tidyselect_0.2.4     expm_0.999-2        
## [31] tibble_2.0.1         IRanges_2.16.0       XML_3.98-1.12       
## [34] viridisLite_0.3.0    crayon_1.3.4         withr_2.1.2         
## [37] grid_3.5.1           gtable_0.2.0         DBI_1.0.0           
## [40] magrittr_1.5         scales_1.0.0         stringi_1.3.1       
## [43] bindrcpp_0.2.2       tools_3.5.1          bit64_0.9-7         
## [46] Biobase_2.42.0       glue_1.3.0           purrr_0.2.5         
## [49] hms_0.4.2            parallel_3.5.1       yaml_2.1.19         
## [52] AnnotationDbi_1.44.0 colorspace_1.3-2     memoise_1.1.0       
## [55] knitr_1.20           bindr_0.1.1
print(paste("susieR ", packageVersion("susieR")))  
## [1] "susieR  0.6.2.411"
# MINERVA PATHS TO SUMMARY STATISTICS DATASETS:
# root <- "../../.."
root <- "~/../../../sc/orga/projects"
Data_dirs = list(
  # ++++++++ GWAS SUMMARY STATS ++++++++ # 
  # Nall et al. (2019) w/ 23andMe
  "Nalls_23andMe" = list(type="Parkinson's GWAS", 
                         topSS="Data/Parkinsons/Nalls_23andMe/Nalls2019_TableS2.xlsx",
                   # fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/23andme/PD_all_post30APRIL2015_5.2_extended.txt")),
                   fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/combined_meta/nallsEtAl2019_allSamples_allVariants.mod.txt"),
                   reference="https://www.biorxiv.org/content/10.1101/388165v3"),  
  
  ## IGAP 
  "IGAP" = list(type="Alzheimer's GWAS", 
                topSS="Data/Alzheimers/IGAP/Lambert_2019_AD_GWAS.xlsx",
                fullSS=file.path(root,"ad-omics/data/AD_GWAS/IGAP/IGAP_stage_1.1000G.phase3.20130502.tsv"),
                reference="https://www.nature.com/articles/ng.2802"),
  
  ## Marioni et al. (2018) 
  "Marioni_2018" = list(type="Alzheimer's GWAS", 
                        topSS="Data/Alzheimers/Marioni_2018/Marioni2018_supplementary_tables.xlsm",
                        fullSS=file.path(root,"ad-omics/data/AD_GWAS/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv"),
                        reference="https://www.nature.com/articles/s41398-018-0150-6"),
  
  ## Jansen et al. (2018) 
  "Posthuma_2018" = list(type="Alzheimer's GWAS", 
                         topSS="Data/Alzheimers/Posthuma_2018/AD_target_SNP.xlsx", 
                         fullSS=file.path(root,"ad-omics/data/AD_GWAS/Posthuma_2018/phase3.beta.se.hrc.txt"),
                reference="https://www.nature.com/articles/s41588-018-0311-9"),
  
  ## Kunkle et al. (2018) Alzheimer's GWAS 
  "Kunkle_2019" = list(type="Alzheimer's GWAS",
                       topSS="Data/Alzheimers/Kunkle_2019/Kunkle2019_supplementary_tables.xlsx", 
                       fullSS=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_2019/Kunkle_etal_Stage1_results.txt"),
                       # fullSS_stage2=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_etal_Stage2_results.txt"),
                       reference="https://www.nature.com/articles/s41588-019-0358-2"),

  # ++++++++ eQTL SUMMARY STATS ++++++++ #
  ## MESA eQTLs: African Americans
  "MESA_AFA" = list(type="eQTL",
                    topSS="Data/eQTL/MESA/AFA_eQTL_PTK2B.txt",
                    fullSS=file.path(root,"ad-omics/data/mesa/AFA_cis_eqtl_summary_statistics.txt"), 
                    reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
  ## MESA eQTLs: Caucasians
  "MESA_CAU" = list(type="eQTL",
                    topSS="Data/eQTL/MESA/CAU_eQTL_PTK2B.txt",
                    fullSS=file.path(root,"ad-omics/data/mesa/CAU_cis_eqtl_summary_statistics.txt"),
                    reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
  ## MESA eQTLs: Hispanics
  "MESA_HIS" = list(type="eQTL",
                    topSS="Data/eQTL/MESA/HIS_eQTL_PTK2B.txt",
                    fullSS=file.path(root,"ad-omics/data/mesa/HIS_cis_eqtl_summary_statistics.txt"),
                    reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa") 
  )

Data_dirs_table <- Data_dirs_to_table(Data_dirs, writeCSV = "directories_table.csv")

# Direct to local data files if not working on Minerva
if(getwd()=="/Users/schilder/Desktop/Fine_Mapping"){
  vcf_folder = F # Use internet if I'm working from my laptop
} else{
  vcf_folder = F#"../1000_Genomes_VCFs/Phase1" # Use previously downloaded files if working on Minerva node
}  

Parkinson’s Disease

Nalls et. al. (2019) w/ 23andMe

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$Nalls_23andMe$topSS, 
  chrom_col = "CHR", position_col = "BP", snp_col="SNP",
  pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
  caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats")


finemapped_PD_Nalls_23andMe <- finemap_geneList(top_SNPs, geneList=c("LRRK2"),#c("LRRK2","GBAP1","SNCA","VPS13C","GCH1"),
                 file_path= Data_dirs$Nalls_23andMe$fullSS,#"./LRRK2_EUR_subset.txt",#
                 chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
                 pval_col = "p", effect_col = "beta", stderr_col = "se", 
                 vcf_folder = vcf_folder, superpopulation = "EUR")

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/LRRK2_EUR_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-4819.31771667441” [1] “objective:-4817.53694272212” [1] “objective:-4817.5346354578” [1] “objective:-4817.53463247054” [1] “objective:-4817.5346324668” [1] “objective:-4817.53463246679”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

Alzheimer’s Disease

Posthuma (2018)

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$Posthuma_2018$topSS,
  sheet = "Overall (phase 3)",
  chrom_col = "Chr", position_col = "bp", snp_col="SNP",
  pval_col="P", effect_col="Z", gene_col="Gene",
  caption= "Posthuma et al. (2018) AD GWAS Summary Stats")

finemapped_AD_Posthuma <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
                 file_path=Data_dirs$Posthuma_2018$fullSS,#"Data/Alzheimers/Posthuma_2018/phase3.beta.se.hrc.txt",#
                 effect_col = "BETA", stderr_col = "SE", position_col = "BP",
                 snp_col = "SNP", pval_col = "P",
                 vcf_folder = vcf_folder, superpopulation = "EUR")

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Posthuma_2018/PTK2B_EUR_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-6157.44090915229” [1] “objective:-6157.0855671831” [1] “objective:-6157.0854402076” [1] “objective:-6157.08544016219”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

Marioni (2018)

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$Marioni_2018$topSS,
  sheet = "Table S9",
  chrom_col = "topSNP_chr", position_col = "topSNP_bp", snp_col="topSNP",
  pval_col="p_GWAS", effect_col="b_GWAS", gene_col="Gene",
  caption= "Marioni et al. (2018) AD GWAS Summary Stats")

finemapped_AD_Marioni <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
                 file_path=Data_dirs$Marioni_2018$fullSS,#"Data/Alzheimers/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv", #,
                 effect_col = "BETA", stderr_col = "SE", position_col = "POS", 
                 snp_col = "SNP",chrom_col = "CHROM", pval_col = "PVAL",
                 vcf_folder = vcf_folder, superpopulation = "EUR")

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Marioni_2018/PTK2B_EUR_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-3136.01873136029” [1] “objective:-3134.8397689979” [1] “objective:-3134.83862729993” [1] “objective:-3134.83862618178” [1] “objective:-3134.8386261807” [1] “objective:-3134.8386261807”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 572 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 572 rows containing missing values (geom_segment).
## Warning: Removed 7 rows containing missing values (geom_text_repel).

IGAP

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$IGAP$topSS,
  sheet = 1,
  chrom_col = "CHR", position_col = "Position", snp_col="SNP",
  pval_col="Overall_P", effect_col="Overall_OR", gene_col="Closest gene",
  caption= "IGAP: AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )

finemapped_AD_IGAP <- finemap_geneList(top_SNPs, geneList=c("PTK2B"), 
                   file_path=Data_dirs$IGAP$fullSS,
                   chrom_col = "CHROM",  pval_col = "PVAL", snp_col = "SNP",
                   effect_col = "BETA", stderr_col = "SE", position_col = "POS", 
                   vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR")

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/IGAP/PTK2B_EUR_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-3041.09565605669” [1] “objective:-3040.53825357985” [1] “objective:-3040.5378253415” [1] “objective:-3040.53782501108”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_segment).
## Warning: Removed 8 rows containing missing values (geom_text_repel).

Kunkle (2019)

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$Kunkle_2019$topSS,
  sheet = "Supplementary Table 6",
  chrom_col = "Chr", position_col = "Basepair", snp_col="SNP",
  pval_col="P", effect_col="Beta", gene_col="LOCUS",
  caption= "Kunkle (2019): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )

finemapped_AD_Kunkle <- finemap_geneList(top_SNPs, geneList=c("PTK2B"), 
                   file_path="Data/Alzheimers/Kunkle_2019/Kunkle_etal_Stage2_results.txt",#Data_dirs$Kunkle_2019$fullSS,
                   chrom_col = "Chromosome",  position_col = "Position", pval_col = "Pvalue",
                   snp_col = "MarkerName", effect_col = "Beta", stderr_col = "SE", 
                   vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR")

eQTL

  • AFR = AFA = African
  • AMR = Ad Mixed American
  • EAS = East Asian
  • EUR = CAU = European
  • SAS = South Asian

MESA - AFA

superpopulation <- "AFA"
finemapped_eQTL_MESA.AFA <- finemap_eQTL(superpopulation, gene =  "PTK2B", 
                                         fullSS_path = Data_dirs$MESA_AFA$fullSS)

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AFR_subset.txt …

## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AFR_subset.txt … Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-6410.83082343629” [1] “objective:-6410.73262589551” [1] “objective:-6410.73258329645” [1] “objective:-6410.73258327796”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

MESA - CAU

superpopulation <- "CAU"
finemapped_eQTL_MESA.CAU <- finemap_eQTL(superpopulation, gene = "PTK2B", 
                                         fullSS_path = Data_dirs$MESA_CAU$fullSS)

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_EUR_subset.txt …

## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_EUR_subset.txt … Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-3454.00148726064” [1] “objective:-3453.35330042907” [1] “objective:-3453.3528459927” [1] “objective:-3453.35284567247”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

MESA - HIS

superpopulation <- "HIS"
try({
  finemapped_eQTL_MESA.HIS <- finemap_eQTL(superpopulation, gene = "PTK2B", 
                                         fullSS_path = Data_dirs$MESA_HIS$fullSS)
})

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AMR_subset.txt …

## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AMR_subset.txt … Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
  • Fine mapping with SusieR… [1] “objective:-4639.57322734461” [1] “objective:-4639.47138123614” [1] “objective:-4639.47122903255” [1] “objective:-4639.47122880477”

Credible Set:

Merge Results

try({
  # finemapped_eQTL_MESA.AFA <- finemapped_eQTL_MESA.HIS<- finemapped_eQTL_MESA.CAU
named_results_list <- list("Nalls_23andMe"=finemapped_PD_Nalls_23andMe, 
                           "IGAP"=finemapped_AD_IGAP,
                           "Posthuma_2018"=finemapped_AD_Posthuma,
                           "Marioni_2018"=finemapped_AD_Marioni,
                           "Kunkle_2019"=finemapped_AD_Kunkle,
                           "eQTL_MESA.AFA"=finemapped_eQTL_MESA.AFA, 
                           "eQTL_MESA.CAU"=finemapped_eQTL_MESA.CAU, 
                           "eQTL_MESA.HIS"=finemapped_eQTL_MESA.HIS)
merged_results <- merge_finemapping_results(named_results_list, csv_path ="merged_results.csv")
createDT(merged_results, caption = "Finemapped Credible Sets for Multiple Datasets")

table(as.character(merged_results$SNP))
})